Loading the essential libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Loading the datasets

# Creating a vector of file pathes 
file_pathes <- list.files(path = "G:\\My Drive\\Projects\\smokingandcaner\\datasets\\iarc", pattern = "*.csv", full.names = TRUE)
# Looping and importeing the datasets
for (i in 1:length(file_pathes)){
  assign(paste0("dataset", i), read_csv(file_pathes[i]))
}
## Rows: 138 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 134 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 138 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 134 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 68 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 110 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 134 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 136 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cancer label, Country label
## dbl (8): Cancer id, Population id, Sex, Type, Year, ASR (World), Crude rate,...
## lgl (1): Cumulative risk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# List all the datasets in the current working directory with a similar prefix
datasets <- ls(pattern="^dataset")

# Use lapply and rbind to combine the datasets into a single dataset
combined_data <- do.call(rbind, lapply(datasets, get))
combined_data <- combined_data %>% clean_names()

Checking the data

## 
##         Australia            France             Japan       New Zealand 
##               138               134               138               134 
## Republic of Korea         Singapore    United Kingdom               USA 
##                68               110               134               136

Plotting

Male Mortality

male_data <- combined_data %>% filter(sex == 1)
male_plot <- ggplot(male_data) + geom_smooth(aes(year, asr_world, color=country_label)) 
ggplotly(male_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Female Mortality

female_data <- combined_data %>% filter(sex == 2)
female_plot <- ggplot(female_data) + geom_smooth(aes(year, asr_world, color=country_label)) 
ggplotly(female_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'